Load the data


In [1]:
import tcxparser
import pandas as pd
from datetime import datetime
from os import listdir
from os.path import join

In [2]:
def get_hr_data(tcx_path):
    tcx = tcxparser.TCXParser(tcx_path)
    hr_values = tcx.hr_values()
    time_values = [datetime.strptime(t, '%Y-%m-%dT%H:%M:%S.000Z') for t in tcx.time_values()]
    elapsed = [(tv - time_values[0]).seconds for tv in time_values]
    return pd.DataFrame(data={'elapsed': elapsed, 'hr': hr_values})

def load_data(dir_path):
    tcx_filenames = [f for f in listdir(dir_path) if f.endswith('.tcx')]
    data = [get_hr_data(join(dir_path, f)) for f in tcx_filenames]
    dates = [datetime.strptime(f.split('_')[1], '%Y%m%d') for f in tcx_filenames]
    return data, dates

In [3]:
activities, activity_dates = load_data('data')

What does the data look like?


In [4]:
first_activity = activities[0]
first_activity.head()


Out[4]:
elapsed hr
0 0 95
1 1 94
2 7 91
3 9 88
4 21 91

In [5]:
first_activity.tail()


Out[5]:
elapsed hr
482 3587 121
483 3589 121
484 3590 122
485 3591 122
486 3598 120

...and visualise the data with Bokeh


In [6]:
import bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.palettes import Blues9, Spectral9, RdYlGn5
from bokeh.models import BoxAnnotation
from bokeh.models import Range1d

In [7]:
output_notebook()


Loading BokehJS ...

What does the first activity look like?


In [8]:
p = figure(plot_width=800, tools='box_zoom,resize,reset,hover')

p.line(first_activity.elapsed, first_activity.hr, line_width=2)

show(p)


Out[8]:

<Bokeh Notebook handle for In[8]>


In [9]:
WALKING = Spectral9[0]
JOGGING = Spectral9[1]
RUNNING = Spectral9[6]

p = figure(plot_width=800, tools='box_zoom,resize,reset,hover')

p.line(first_activity.elapsed, first_activity.hr, line_width=2)

p.add_layout(BoxAnnotation(left=0, right=300, fill_alpha=0.1, line_color=WALKING, fill_color=WALKING))
p.add_layout(BoxAnnotation(left=300, right=600, fill_alpha=0.1, line_color=JOGGING, fill_color=JOGGING))
p.add_layout(BoxAnnotation(left=600, right=3000, fill_alpha=0.1, line_color=RUNNING, fill_color=RUNNING))
p.add_layout(BoxAnnotation(left=3000, right=3300, fill_alpha=0.1, line_color=JOGGING, fill_color=JOGGING))
p.add_layout(BoxAnnotation(left=3300, right=3600, fill_alpha=0.1, line_color=WALKING, fill_color=WALKING))

show(p)


Out[9]:

<Bokeh Notebook handle for In[9]>


In [10]:
TOOLS = 'box_zoom,resize,reset,hover'
WIDTH=800
BAND_OPACITY=0.1

WALKING = Spectral9[0]
JOGGING = Spectral9[1]
RUNNING = Spectral9[6]

def plot_activity(activity):
    p = figure(plot_width=WIDTH, tools=TOOLS)
    p.line(activity.elapsed, activity.hr, line_width=2)
    return p

def add_intensity_band(p, left, right, colour):
    band = BoxAnnotation(left=left, right=right, fill_alpha=BAND_OPACITY, line_color=colour, fill_color=colour)
    p.add_layout(band)

def add_intensity_bands(p):
    add_intensity_band(p, 0, 300, WALKING)
    add_intensity_band(p, 300, 600, JOGGING)
    add_intensity_band(p, 600, 3000, RUNNING)
    add_intensity_band(p, 3000, 3300, JOGGING)
    add_intensity_band(p, 3300, 3600, WALKING)

Now all activities


In [11]:
p = figure(plot_width=WIDTH, tools=TOOLS)

for activity in activities:
    p.line(activity.elapsed, activity.hr, line_width=2)

show(p)


Out[11]:

<Bokeh Notebook handle for In[11]>

What's going on? How do they vary over time?


In [12]:
p = figure(plot_width=WIDTH, tools=TOOLS)

colours = list(Blues9); colours.reverse()

for activity in activities:
    line_colour = colours.pop()
    p.line(activity.elapsed, activity.hr, line_width=2, line_color=line_colour)

show(p)


Out[12]:

<Bokeh Notebook handle for In[12]>


In [13]:
X_LABEL = 'Elapsed Time (s)'
Y_LABEL = 'Heart Rate (BPM)'

def plot_activities_colourful(activities):
    p = figure(plot_width=WIDTH, tools=TOOLS, x_axis_label=X_LABEL, y_axis_label=Y_LABEL)
    colours = list(Blues9); colours.reverse()
    for activity in activities:
        line_colour = colours.pop()
        p.line(activity.elapsed, activity.hr, line_width=2, line_color=line_colour)
    return p

def focus(p, start, end, hr_min, hr_max):
    p.x_range = Range1d(start, end)
    p.y_range = Range1d(hr_min, hr_max)

All together now!


In [14]:
p = plot_activities_colourful(activities)
add_intensity_bands(p)
focus(p, 0, 3600, 70, 170)
show(p)


Out[14]:

<Bokeh Notebook handle for In[14]>

Zooming in on the first transition


In [15]:
p = plot_activities_colourful(activities)
add_intensity_bands(p)

p.title = 'Walking to Jogging'
focus(p, 240, 600, 80, 140)

show(p)


Out[15]:

<Bokeh Notebook handle for In[15]>


In [16]:
p = plot_activities_colourful(activities)
add_intensity_bands(p)

p.title = 'Jogging to Running'
focus(p, 540, 900, 115, 155)

show(p)


Out[16]:

<Bokeh Notebook handle for In[16]>


In [17]:
p = plot_activities_colourful(activities)
add_intensity_bands(p)

p.title = 'Running to Jogging'
focus(p, 2940, 3300, 135, 165)

show(p)


Out[17]:

<Bokeh Notebook handle for In[17]>


In [18]:
p = plot_activities_colourful(activities)
add_intensity_bands(p)

p.title = 'Jogging to Walking'
focus(p, 3240, 3600, 110, 160)

show(p)


Out[18]:

<Bokeh Notebook handle for In[18]>

What are the steady state values?

First, isolate the data we care about


In [19]:
# the last two minutes of walking
first_activity.elapsed.between(180, 300)


Out[19]:
0      False
1      False
2      False
3      False
4      False
5      False
6      False
7      False
8      False
9      False
10     False
11     False
12     False
13     False
14     False
15     False
16     False
17     False
18     False
19     False
20     False
21     False
22     False
23     False
24      True
25      True
26      True
27      True
28      True
29      True
       ...  
457    False
458    False
459    False
460    False
461    False
462    False
463    False
464    False
465    False
466    False
467    False
468    False
469    False
470    False
471    False
472    False
473    False
474    False
475    False
476    False
477    False
478    False
479    False
480    False
481    False
482    False
483    False
484    False
485    False
486    False
Name: elapsed, dtype: bool

In [20]:
first_activity[first_activity.elapsed.between(180, 300)]


Out[20]:
elapsed hr
24 185 95
25 198 93
26 211 93
27 213 92
28 215 92
29 228 93
30 240 96
31 248 99
32 258 98
33 260 98
34 266 97
35 268 94
36 270 92
37 271 93
38 273 94
39 277 97
40 280 99
41 282 98
42 291 101
43 299 104

then calculate the mean


In [21]:
def mean_hr_between(activity, start, end):
    ix_range = activity.elapsed.between(start, end)
    data = activity[ix_range]
    return data.hr.mean()

mean_hr_between(first_activity, 120, 180)


Out[21]:
94.28571428571429

How do the averages change over time?


In [22]:
running_mean = [mean_hr_between(a, 600, 3000) for a in activities]

p = figure(plot_width=WIDTH, plot_height=400, x_axis_type="datetime", tools=TOOLS)

p.title = 'Running Steady State'
p.xaxis.axis_label = 'Activity Date'
p.yaxis.axis_label = 'Average HR (BPM)'

p.circle(activity_dates, running_mean, size=10, color=Spectral9[6])

show(p)


Out[22]:

<Bokeh Notebook handle for In[22]>

How do they compare?


In [23]:
running_steady = [mean_hr_between(a, 2880, 3000) for a in activities]
cooldown_steady = [mean_hr_between(a, 3180, 3300) for a in activities]

p = figure(plot_width=WIDTH, plot_height=400, x_axis_type="datetime", tools=TOOLS)

p.title = 'Running to Jogging'
p.xaxis.axis_label = 'Activity Date'
p.yaxis.axis_label = 'Average HR (BPM)'

p.circle(activity_dates, running_steady, size=10, color=Spectral9[6])
p.circle(activity_dates, cooldown_steady, size=10, color=Spectral9[1])
show(p)


Out[23]:

<Bokeh Notebook handle for In[23]>

Now jogging and walking


In [24]:
walking_steady = [mean_hr_between(a, 3480, 3600) for a in activities]

p = figure(plot_width=WIDTH, plot_height=400, x_axis_type="datetime", tools=TOOLS)

p.title = 'Jogging to Walking'
p.xaxis.axis_label = 'Activity Date'
p.yaxis.axis_label = 'Average HR (BPM)'

p.circle(activity_dates, cooldown_steady, size=10, color=Spectral9[1])
p.circle(activity_dates, walking_steady, size=10, color=Spectral9[0])

show(p)


Out[24]:

<Bokeh Notebook handle for In[24]>

What do these averages look like on the activity?


In [25]:
p = plot_activity(first_activity)
focus(p, 2940, 3300, 150, 165)
show(p)


Out[25]:

<Bokeh Notebook handle for In[25]>


In [26]:
p = plot_activity(first_activity)
focus(p, 2940, 3300, 150, 165)

running_steady = mean_hr_between(first_activity, 2940, 3000)
p.line(x=[0,3600], y=[running_steady, running_steady], line_color=RUNNING, line_width=2)

jogging_steady = mean_hr_between(first_activity, 3240, 3300)
p.line(x=[0,3600], y=[jogging_steady, jogging_steady], line_color=JOGGING, line_width=2)

p.ygrid.grid_line_color = None
p.xgrid.grid_line_alpha = 0.75
p.xgrid.grid_line_dash = [6, 4]

show(p)


Out[26]:

<Bokeh Notebook handle for In[26]>

Time taken to reach 50%?


In [27]:
threshold = (running_steady + jogging_steady) / 2
threshold


Out[27]:
157.8125

In [28]:
p = plot_activity(first_activity)
focus(p, 2940, 3300, 150, 165)

running_steady = mean_hr_between(first_activity, 2940, 3000)
p.line(x=[0,3600], y=[running_steady, running_steady], line_color=RUNNING, line_width=2)

p.line(x=[0,3600], y=[threshold, threshold], line_color='black', line_width=2, line_dash=[8, 3])

jogging_steady = mean_hr_between(first_activity, 3240, 3300)
p.line(x=[0,3600], y=[jogging_steady, jogging_steady], line_color=JOGGING, line_width=2)


p.ygrid.grid_line_color = None
p.xgrid.grid_line_alpha = 0.75
p.xgrid.grid_line_dash = [6, 4]

show(p)


Out[28]:

<Bokeh Notebook handle for In[28]>

Let's use the first measurement below the threshold


In [29]:
jogging = first_activity[first_activity.elapsed.between(3000, 3300)]
jogging[jogging.hr < threshold].head()


Out[29]:
elapsed hr
421 3052 157
422 3060 157
423 3068 157
424 3076 155
425 3084 156

In [30]:
jogging[jogging.hr < threshold].iloc[0].elapsed


Out[30]:
3052

In [31]:
threshold_hit_at = jogging[jogging.hr < threshold].iloc[0].elapsed - 3000
threshold_hit_at


Out[31]:
52

What does that look like?


In [32]:
p = plot_activity(first_activity)

running_steady = mean_hr_between(first_activity, 2940, 3000)
p.line(x=[0,3600], y=[running_steady, running_steady], line_color=RUNNING, line_width=1)

p.line(x=[0,3600], y=[threshold, threshold], line_color='black', line_width=2, line_dash=[8, 3])

jogging_steady = mean_hr_between(first_activity, 3240, 3300)
p.line(x=[0,3600], y=[jogging_steady, jogging_steady], line_color=JOGGING, line_width=1)

hit_at = 3000 + threshold_hit_at
p.line(x=[3000,3000], y=[0, 180], line_color=RUNNING, line_width=1)
p.line(x=[hit_at, hit_at], y=[0, 180], line_color='black', line_width=2, line_dash=[8,4])

focus(p, 2940, 3300, 150, 165)
p.ygrid.grid_line_color = None
p.xgrid.grid_line_color = None
show(p)


Out[32]:

<Bokeh Notebook handle for In[32]>

Generalise these into some reusable functions


In [33]:
def calc_threshold(start_value, end_value, percent):
    delta = end_value - start_value
    return start_value + percent * delta

calc_threshold(running_steady, jogging_steady, 0.5)


Out[33]:
157.8125

In [34]:
def calc_threshold_offset(activity, start_time, end_time, percent):
    initial_hr = mean_hr_between(activity, start_time - 60, start_time)
    final_hr = mean_hr_between(activity, end_time - 60, end_time)

    threshold = calc_threshold(initial_hr, final_hr, percent)
    
    transition = activity[activity.elapsed.between(start_time, end_time)]
    first_sample = transition[transition.hr < threshold].iloc[0]
    
    return first_sample.elapsed - start_time

calc_threshold_offset(first_activity, 3000, 3300, 0.5)


Out[34]:
52

How does Running to Jogging change over time?


In [35]:
run_jog_offsets = [calc_threshold_offset(a, 3000, 3300, 0.5) for a in activities]
run_jog_offsets


Out[35]:
[52, 48, 48, 55, 71, 33, 49, 41]

In [36]:
p = figure(plot_width=WIDTH, plot_height=400, x_axis_type="datetime", tools=TOOLS)

p.circle(activity_dates, run_jog_offsets, size=10)

show(p)


Out[36]:

<Bokeh Notebook handle for In[36]>

What about different percentages, IQ ranges, etc?


In [37]:
PERCENTAGES = [0.1, 0.25, 0.5, 0.75, 0.9]
PERCENTAGES


Out[37]:
[0.1, 0.25, 0.5, 0.75, 0.9]

In [38]:
[[calc_threshold_offset(a, 3000, 3300, p) for p in PERCENTAGES] for a in activities]


Out[38]:
[[12, 20, 52, 76, 116],
 [32, 32, 48, 80, 88],
 [6, 6, 48, 56, 65],
 [19, 37, 55, 91, 136],
 [26, 26, 71, 80, 117],
 [6, 15, 33, 56, 110],
 [6, 22, 49, 112, 148],
 [6, 24, 41, 59, 104]]

In [39]:
p = figure(plot_width=WIDTH, plot_height=400, x_axis_type="datetime", tools=TOOLS)

p.title = 'Threshold Change Over Time'
p.xaxis.axis_label = 'Activity Date'
p.yaxis.axis_label = 'Time Taken (s)'

sizes = list(range(50, 0, -3))
colours = list(RdYlGn5)

for pct in PERCENTAGES:
    colour = colours.pop()
    size = sizes.pop()
    offsets = [calc_threshold_offset(a, 3000, 3300, pct) for a in activities]
    p.circle(activity_dates, offsets, size=12, color=colour)

show(p)


Out[39]:

<Bokeh Notebook handle for In[39]>